##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:utils':
##
## View
##
## Loading required package: ggplot2
The data set is “Seattle Police Department 911 Incident Response.”
People have called 911 at different times in the city of Seattle for various incidents. The incidents are described by the column “initial_type_subgroup” which are further subgrouped under“initial_type_description”.
The SPD has divided different areas in the city by “district_sectors” which are further subdivided into smaller zones indicated by the column “zone_beat”. Also, each incident has been given a reference number indicated by the column name “cad_event_number” and “cad_cdw_id”.
Depending on where the crime was reported from, a latitude and longitude were collected using which we can get the approximate location of where the incident/crime occurred. The variable “at_scene_time” gives us a time when the incident/crime was reported through 911 to the SPD.
#Get the data from Socrata's open data website and save it in a variable called spd911
spd911 <- fromJSON("https://data.seattle.gov/resource/3k2p-39jp.json")
#Look at the top 6 rows of the data frame
head(spd911)
## cad_event_number cad_cdw_id zone_beat
## 1 15000035997 581875 K3
## 2 15000035929 581910 Q1
## 3 15000035487 582157 K3
## 4 15000035390 582215 F2
## 5 15000035285 582277 L1
## 6 15000035205 582326 F1
## initial_type_description district_sector
## 1 AUTO THEFT - VEH THEFT OR THEFT AND RECOVERY K
## 2 NARCOTICS - VIOLATIONS (LOITER, USE, SELL, NARS) Q
## 3 FOOT - ELUDING POLICE K
## 4 AUTO RECOVERY F
## 5 AUTO RECOVERY L
## 6 AUTO RECOVERY F
## initial_type_subgroup incident_location.needs_recoding
## 1 AUTO THEFTS FALSE
## 2 NARCOTICS COMPLAINTS FALSE
## 3 TRAFFIC RELATED CALLS FALSE
## 4 AUTO THEFTS FALSE
## 5 AUTO THEFTS FALSE
## 6 AUTO THEFTS FALSE
## incident_location.longitude incident_location.latitude
## 1 -122.330271593 47.600875809
## 2 -122.37613941 47.636336049
## 3 -122.326350868 47.601708802
## 4 -122.363172642 47.525585666
## 5 -122.304248161 47.727498035
## 6 -122.369833395 47.546493546
## hundred_block_location general_offense_number longitude
## 1 3 AV S / S WASHINGTON ST 201535997 -122.330271593
## 2 20XX BLOCK OF 15 AV W 201535929 -122.376139410
## 3 6 AV / YESLER WY 201535487 -122.326350868
## 4 86XX BLOCK OF 24 AV SW 201535390 -122.363172642
## 5 135XX BLOCK OF 23 AV NE 201535285 -122.304248161
## 6 63XX BLOCK OF 29 AV SW 201535205 -122.369833395
## latitude at_scene_time initial_type_group census_tract
## 1 47.600875809 2015-02-01T00:20:00 AUTO RECOVERIES 9200.2014
## 2 47.636336049 2015-01-31T23:12:00 NARCOTICS COMPLAINTS 5802.2003
## 3 47.601708802 2015-01-31T15:14:00 TRAFFIC RELATED CALLS 9200.1002
## 4 47.525585666 2015-01-31T13:36:00 AUTO RECOVERIES 11401.2005
## 5 47.727498035 2015-01-31T12:08:00 AUTO RECOVERIES 200.6017
## 6 47.546493546 2015-01-31T10:24:00 AUTO RECOVERIES 10700.4001
## event_clearance_code event_clearance_subgroup event_clearance_group
## 1 <NA> <NA> <NA>
## 2 <NA> <NA> <NA>
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## event_clearance_description
## 1 <NA>
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
#Look at all the column names in the data frame
colnames(spd911)
## [1] "cad_event_number" "cad_cdw_id"
## [3] "zone_beat" "initial_type_description"
## [5] "district_sector" "initial_type_subgroup"
## [7] "incident_location" "hundred_block_location"
## [9] "general_offense_number" "longitude"
## [11] "latitude" "at_scene_time"
## [13] "initial_type_group" "census_tract"
## [15] "event_clearance_code" "event_clearance_subgroup"
## [17] "event_clearance_group" "event_clearance_description"
#Look at the structure of the data frame
str(spd911)
## 'data.frame': 1000 obs. of 18 variables:
## $ cad_event_number : chr "15000035997" "15000035929" "15000035487" "15000035390" ...
## $ cad_cdw_id : chr "581875" "581910" "582157" "582215" ...
## $ zone_beat : chr "K3" "Q1" "K3" "F2" ...
## $ initial_type_description : chr "AUTO THEFT - VEH THEFT OR THEFT AND RECOVERY" "NARCOTICS - VIOLATIONS (LOITER, USE, SELL, NARS)" "FOOT - ELUDING POLICE" "AUTO RECOVERY" ...
## $ district_sector : chr "K" "Q" "K" "F" ...
## $ initial_type_subgroup : chr "AUTO THEFTS" "NARCOTICS COMPLAINTS" "TRAFFIC RELATED CALLS" "AUTO THEFTS" ...
## $ incident_location :'data.frame': 1000 obs. of 3 variables:
## ..$ needs_recoding: logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## ..$ longitude : chr "-122.330271593" "-122.37613941" "-122.326350868" "-122.363172642" ...
## ..$ latitude : chr "47.600875809" "47.636336049" "47.601708802" "47.525585666" ...
## $ hundred_block_location : chr "3 AV S / S WASHINGTON ST" "20XX BLOCK OF 15 AV W" "6 AV / YESLER WY" "86XX BLOCK OF 24 AV SW" ...
## $ general_offense_number : chr "201535997" "201535929" "201535487" "201535390" ...
## $ longitude : chr "-122.330271593" "-122.376139410" "-122.326350868" "-122.363172642" ...
## $ latitude : chr "47.600875809" "47.636336049" "47.601708802" "47.525585666" ...
## $ at_scene_time : chr "2015-02-01T00:20:00" "2015-01-31T23:12:00" "2015-01-31T15:14:00" "2015-01-31T13:36:00" ...
## $ initial_type_group : chr "AUTO RECOVERIES" "NARCOTICS COMPLAINTS" "TRAFFIC RELATED CALLS" "AUTO RECOVERIES" ...
## $ census_tract : chr "9200.2014" "5802.2003" "9200.1002" "11401.2005" ...
## $ event_clearance_code : chr NA NA NA NA ...
## $ event_clearance_subgroup : chr NA NA NA NA ...
## $ event_clearance_group : chr NA NA NA NA ...
## $ event_clearance_description: chr NA NA NA NA ...
Since our analyses does not include all the columns it makes sense to get rid of the data that is not important for our analyses. This does not mean that the data is of no use. The removed data can be used in another analyses.
We first remove all such columns and then we perform further data cleaning by type casting certain columns to appropriate data types which would make analysis easy for us
#These are the columns that are of least use to us and for our analysis we do not require them, hence we simply remove these columns
spd911$event_clearance_code<-NULL
spd911$event_clearance_group<-NULL
spd911$event_clearance_subgroup<-NULL
spd911$event_clearance_description<-NULL
spd911$incident_location<-NULL
#After performing some basic data cleaning we also need to perform type casting to ensure that all the columns in the data frame can be used appropriately
spd911$cad_cdw_id<-as.integer(spd911$cad_cdw_id)
spd911$general_offense_number<-as.integer(spd911$general_offense_number)
spd911$district_sector<-as.factor(spd911$district_sector)
spd911$longitude<-as.numeric(spd911$longitude)
spd911$latitude<-as.numeric(spd911$latitude)
#Since there is an additional "T" in the time we need to replace it by a blank space
spd911$at_scene_time<-gsub("T"," ",spd911$at_scene_time)
spd911$at_scene_time<-as.POSIXct(spd911$at_scene_time)
#Look at the structure of the cleaned data frame
str(spd911)
## 'data.frame': 1000 obs. of 13 variables:
## $ cad_event_number : chr "15000035997" "15000035929" "15000035487" "15000035390" ...
## $ cad_cdw_id : int 581875 581910 582157 582215 582277 582326 582470 582573 582638 582653 ...
## $ zone_beat : chr "K3" "Q1" "K3" "F2" ...
## $ initial_type_description: chr "AUTO THEFT - VEH THEFT OR THEFT AND RECOVERY" "NARCOTICS - VIOLATIONS (LOITER, USE, SELL, NARS)" "FOOT - ELUDING POLICE" "AUTO RECOVERY" ...
## $ district_sector : Factor w/ 18 levels "99","B","C","D",..: 9 14 9 6 10 6 15 18 14 15 ...
## $ initial_type_subgroup : chr "AUTO THEFTS" "NARCOTICS COMPLAINTS" "TRAFFIC RELATED CALLS" "AUTO THEFTS" ...
## $ hundred_block_location : chr "3 AV S / S WASHINGTON ST" "20XX BLOCK OF 15 AV W" "6 AV / YESLER WY" "86XX BLOCK OF 24 AV SW" ...
## $ general_offense_number : int 201535997 201535929 201535487 201535390 201535285 201535205 201534946 201534755 201534638 201534610 ...
## $ longitude : num -122 -122 -122 -122 -122 ...
## $ latitude : num 47.6 47.6 47.6 47.5 47.7 ...
## $ at_scene_time : POSIXct, format: "2015-02-01 00:20:00" "2015-01-31 23:12:00" ...
## $ initial_type_group : chr "AUTO RECOVERIES" "NARCOTICS COMPLAINTS" "TRAFFIC RELATED CALLS" "AUTO RECOVERIES" ...
## $ census_tract : chr "9200.2014" "5802.2003" "9200.1002" "11401.2005" ...
We want to plot a map of Seattle that shows the areas where the crimes occur, rather, where the call to 911 was made from. If we analyze the data points on the map we can see that although the calls are spread out all over Seattle, there is a high concentration of calls that is made from the center close to Capitol Hill and Downtown Seattle. This is something that we can look into as to why the crime rate is so high in these areas.
#change the crimes data into a SpatialPointsDataFrame
coords <- cbind(longitude = as.numeric(as.character(spd911$longitude)), latitude=as.numeric(as.character(spd911$latitude)))
crime_points <- SpatialPointsDataFrame(coords, spd911[, -(9:10)])
#Plot just the points where the crime occurred without a map of the city
plot(crime_points, pch = ".", col = "darkred",cex=4)
#Create a map of seattle
map <- qmap('Seattle',zoom=11,maptype='hybrid')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Seattle&zoom=11&size=640x640&scale=2&maptype=hybrid&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Seattle&sensor=false
#Plot the crime points on top of the map that we created to show the locations where the crimes occurred
map + geom_point(data = spd911, aes(x = spd911$longitude, y = spd911$latitude), color="red", size=3, alpha=0.4,na.rm = T)
We would also like to see what type of crimes occur by district sectors. To further analyze the data we can see their frequency in each sub sector, that is, zone beat the crimes occur in which are encoded by color.
#Create a list that contains all the district sectors in our data frame
district_sector_list<-list(as.character(unique(spd911$district_sector)))
#district_sector_list[[1]][1]
#Create a for loop that takes each individual district sectors
for(i in 1:length(district_sector_list[[1]])){
#Store the district sector in a variable
sector<-district_sector_list[[1]][i]
#Create a smaller subset data frame for a specific district sector
subset_sector<-subset(spd911,district_sector==sector)
#Use xtabs() function on a factor to get a contingency table to make it easier to create a histogram
initial_type_description_subgroup<-xtabs(~subset_sector$initial_type_subgroup)
#Modify the margin to ensure the graph and the details are all visible
par(mar=c(10,3,3,1))
#Create a barplot
barplot(initial_type_description_subgroup,col = as.factor(subset_sector$zone_beat),
ylim=c(0,20),las=2,cex.names = 0.5,xlab = "")
legend("topright",pch = 20,col=unique(as.factor(subset_sector$zone_beat)),
legend=unique(subset_sector$zone_beat),cex = 0.5,title = "Zone Beats")
mtext("Crimes Committed",side = 1,line = 9)
}
One of the biases of our analysis is that we have assumed that the calls to 911 were made from the same location or close to the same location as to where the crime was committed